

################### Nature Paper Figures and Tables ###########################


library(tidyverse)
library(ggthemes)
library(dplyr)
library(ggplot2)
library(tidytext)
library(writexl)
library(readxl)
library(patchwork)


### Set working directory

setwd("/Users/benyoel/Dropbox/DEED_ACE/DEED_RA_2425/Nature Paper/Replication Files")

data = read_csv("DEEDv7_Final_20251216.csv")

data$FH_year <- as.integer(data$FH_year)

### number of countries in the data

num_countries <- dplyr::n_distinct(data$Country)
num_countries

#printing country names
unique(data$Country)

event_counts <- data %>%
  count(Type) %>%
  arrange(desc(n))  
print(event_counts)

### V-Dem DEED comparison

vdem <- read.csv("V-Dem-CY-Full+Others-v15.csv") 
vdem <- vdem %>% 
  rename(Country = country_name, Year_1 = year) %>%
  filter(Year_1 >= 2000 & Year_1 <= 2024) %>% 
  dplyr::select(Country, v2x_libdem, v2x_polyarchy, Year_1,
         v2mecenefm_ord, v2elaccept_ord, e_regionpol_6C, v2x_regime) 


### Renaming countries to ensure match between our data and V-Dem's data.

country_name_mapping <- c(
  "Czech Republic" = "Czechia",
  "North Macedonia" = "Macedonia",
  "Turkey" = "Türkiye"
)


### making sure country names match
vdem <- vdem %>%
  mutate(
    Country = dplyr::recode(Country, !!!country_name_mapping)
  )


data <- data %>%
  mutate(Country = dplyr::recode(Country, !!!country_name_mapping))


### Merging V-Dem with the combined dataset to get liberal democracy,
### Regimes of the World, Polyarchy, and region information. 
final <- left_join(data, vdem, by = c("Country", "Year_1")) %>%
  arrange(Country)

### Recoding regions from numerics into characters.
final <- final %>%
  mutate(e_regionpol_6C = dplyr::recode(e_regionpol_6C,
                                 `1` = "Eastern Europe and Central Asia",
                                 `2` = "Latin America and the Caribbean",
                                 `3` = "Middle East and North Africa",
                                 `4` = "Sub-Saharan Africa",
                                 `5` = "Western Europe and North America",
                                 `6` = "Asia and Pacific"))


### Recoding Regimes of the World from numerics into characters.
final <- final %>%
  mutate(v2x_regime = dplyr::recode(v2x_regime,
                             `0` = "Closed Autocracy",
                             `1` = "Electoral Autocracy",
                             `2` = "Electoral Democracy",
                             `3` = "Liberal Democracy")) 


################### Now preparing materials for FIGURE 1 #####################


# Extract top 5 events for each event type in our dataset (FH and Student Case)
symptoms_combined <- final %>%
  filter(Type == "Symptom") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

precursors_combined <- final %>%
  filter(Type == "Precursor") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

resistance_combined <- final %>%
  filter(Type == "Resistance") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

destabilizing_combined <- final %>%
  filter(Type == "Destabilizing Event") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

combined_data_top <- bind_rows(
  symptoms_combined %>% mutate(Type = "Symptom"),
  precursors_combined %>% mutate(Type = "Precursor"),
  resistance_combined %>% mutate(Type = "Resistance"),
  destabilizing_combined  %>% mutate(Type = "Destabilizing Event")
)

combined_data_top$Type <- factor(combined_data_top$Type, levels = c("Precursor", 
                                                                    "Symptom", 
                                                                    "Resistance",
                                                                    "Destabilizing Event"))

ggplot(combined_data_top, 
       aes(x = reorder_within(Category, count, Type), y = count, fill = Type)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Type, scales = "free_y", ncol = 1, strip.position = "top") +
  scale_x_reordered() +  
  scale_fill_manual(values = c("Symptom" = "black", 
                               "Precursor" = "darkgray", 
                               "Resistance" = "lightgray", 
                               "Destabilizing Event" = "gray65")) +
  labs(
    title = "",
    x = "",
    y = "Event Counts"
  ) +
  coord_flip() +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.background = element_blank(),
    strip.text = element_text(face = "bold")
  )

ggsave("DEED_EventCounts_final.png", height = 8, width = 8)


############### CPJ Data Comparison - Technical Validation Section #############

## import CPJ data for journalists killed and journalists imprisoned

cpj_killed <- read_csv("killed.csv") %>%
  dplyr::select(year, country) %>%
  filter(year >= 2000 & year <= 2023) %>%
  group_by(country, year) %>%
  summarize(n_killed = n()) %>%
  mutate(year = as.character(year)) %>%
  ungroup()


cpj_imprisoned <- read_csv("cpj_journalists_imprisoned.csv") %>%
  mutate(year = str_extract(Date, "\\d{4}")) %>%
  dplyr::select(year, Location) %>%
  filter(year >= 2000 & year <= 2019) %>%
  rename("country" = "Location") %>%
  group_by(country, year) %>%
  summarize(n_imprisoned = n()) %>%
  mutate(year = as.character(year)) %>%
  ungroup()


# Create a master list of all unique `country` and `year`
master_keys <- bind_rows(cpj_killed, cpj_imprisoned) %>%
  distinct(country, year) %>%
  arrange(country, year)


cpj <- master_keys %>%
  left_join(cpj_imprisoned, by = c("country", "year")) %>%
  left_join(cpj_killed, by = c("country", "year")) %>%
  rowwise() %>%
  mutate(n = sum(c_across(n_imprisoned : n_killed), na.rm = T)) %>%
  ungroup() %>%
  dplyr::select(country, year, n)  %>%
  mutate(n = ifelse(is.na(n), 0, n)) %>%
  mutate(year = as.double(year))

cpj = cpj %>% 
  rename(Country = country) %>% 
  rename(Year_1 = year)

### DEED media repression event counts 

media_repression_counts <- data %>%
  filter(Category == "Media Repression") %>%
  group_by(Country, Year_1) %>%
  summarise(Media_Repression_Count = n(), .groups = "drop") 

### dataset to merge with every country and every year (to ensure years with no DEED events are captured as 0 in the final merge)

countries <- unique(data$Country)  
years <- unique(data$Year_1)  

country_year_data <- expand.grid(
  Country = countries,
  Year_1 = years)

complete_counts <- country_year_data %>%
  left_join(media_repression_counts, by = c("Country", "Year_1")) %>%
  mutate(Media_Repression_Count = replace_na(Media_Repression_Count, 0))

merged_df_CPJ <- complete_counts %>%
  left_join(cpj, by = c("Country", "Year_1")) %>%
  mutate(n = replace_na(n, 0)) %>%
  mutate(Media_Repression_Group = ifelse(Media_Repression_Count > 0, "At Least One Event", "No Media Repression")) %>%
  mutate(CPJ_Group = ifelse(n > 0, "At Least One CPJ", "No CPJ")) %>%
  filter(Country %in% countries)


table(merged_df_CPJ$CPJ_Group, merged_df_CPJ$Media_Repression_Group)
#                            At Least One Event No Media Repression
#At Least One CPJ                352                 232
# No CPJ                         1124                2036

length(merged_df_CPJ$Year_1[merged_df_CPJ$n>0]) # 584 country-years
352/584 # 60% of years with journalists killed/imprisomed, there is a DEED group 


table(merged_df_CPJ$n, merged_df_CPJ$Media_Repression_Group)
#             At Least One Event No Media Repression
# 0                1124                2036

1124/(1124+2036) # 0.3556962
1124+2036 # == 3160
length(merged_df_CPJ$Year_1[merged_df_CPJ$n==0]) # 3160
### in 36% of cases with no journalists killed or imprisoned, there was a DEED event 




############### Figure 2 in the paper #########################################

## DEED media repression and media bias event counts (categories combined)

deed_media <- final %>%
  filter(Category %in% c("Media Repression","Media Bias")) %>%
  group_by(Country, Year_1) %>%
  summarise(Event_Count = n(), .groups = "drop")


complete_media <- country_year_data %>%
  left_join(deed_media, by = c("Country", "Year_1")) %>%
  left_join(vdem %>% dplyr::select(Country, Year_1, v2mecenefm_ord), by = c("Country", "Year_1")) %>%
  mutate(Event_Count = replace_na(Event_Count, 0)) %>%
  mutate(v2mecenefm_ord = replace_na(v2mecenefm_ord, 0))

#### recode so the VDEM runs higher for more censorship
vdem_media <- complete_media %>%
  mutate(v2mecenefm_ord = 4 - v2mecenefm_ord) %>%
  mutate(Media_Status = ifelse(Event_Count > 0, 
                               "At Least One Event", "No Media Bias or Repression"))


vdem_media <- vdem_media %>%
  mutate(deed01 = ifelse(Event_Count != 0, 1, 0),
         vdem01 = ifelse(v2mecenefm_ord == 0, 1, 0))

table(vdem_media$vdem01, vdem_media$deed01, dnn = c("vdem", "deed"))

vdem_media$v2mecenefm_ord <- factor(vdem_media$v2mecenefm_ord,
                                    levels = 0:4,
                                    labels = c("Rare, and punished",
                                               "Indirect, limited",
                                               "Direct, limited",
                                               "Indirect, routine",
                                               "Direct, routine"))

### recode into only 3 categories 

vdem_media <- vdem_media %>%
  mutate(v2mecenefm_ord_3cats = case_when(
    v2mecenefm_ord == "Rare, and punished" ~ "Rare",
    v2mecenefm_ord == "Indirect, limited" ~ "Limited",
    v2mecenefm_ord == "Direct, limited" ~ "Limited",
    v2mecenefm_ord == "Indirect, routine" ~ "Routine",
    v2mecenefm_ord == "Direct, routine" ~ "Routine"
  ))


censorship.plot.order <- c("Rare", "Limited", "Routine")
vdem_media$v2mecenefm_ord_3cats <- factor(vdem_media$v2mecenefm_ord_3cats, levels = censorship.plot.order)


mediaplot = ggplot(vdem_media, aes(x = v2mecenefm_ord_3cats, y = Event_Count)) +
  stat_summary(fun = mean, geom = "bar") +
  labs(x = "Government Censorship Effort - Media (VDEM)",
       y = "Mean Media Bias and Repression Events (DEED)") +
  theme_minimal()

mediaplot

ggsave("mediaplot_final.png", height = 8, width = 8)


################### Figure 3  #############################


event_data <- final %>%
  filter(Type %in% c("Symptom", "Precursor", "Resistance", "Destabilizing Event"), 
         !is.na(v2x_regime), 
         !(Type == "Destabilizing Event" & v2x_regime %in% c("Liberal Democracy", "Electoral Democracy"))) %>%
  mutate(Type = recode(Type, "Destabilizing Event" = "Destabilizing")) %>%
  count(v2x_regime, Type)  

ggplot(event_data, aes(x = Type, y = n, fill = Type)) +
  geom_col(position = "dodge") +  
  scale_fill_manual(values = c("Destabilizing" = "red",
                               "Precursor" = "gray45",  
                               "Symptom" = "gray78", 
                               "Resistance" = "gray6")) +  
  labs(title = "",
       x = "",
       y = "Event Counts") +
  theme_minimal() +
  facet_wrap(~ v2x_regime, scales = "fixed") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))

ggsave("typecount_byregime_new.png", height = 8, width = 8)


######################### Appendix plots #######################################


### Figure A1 - bar plot of event counts across all years


filtered_data <- final %>% 
  filter(Year_1 >= 2000 & Year_1 <= 2024) %>% 
  filter(!is.na(Type)) %>%
  group_by(Year_1, Type) %>% 
  summarise(count = n(), .groups = 'drop')

ggplot(filtered_data, aes(x = Year_1, y = count, fill = Type)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  scale_fill_manual(values = c("Symptom" = "black",  
                               "Precursor" = "lightgray",  
                               "Resistance" = "darkgray",  
                               "Destabilizing Event" = "red")) + 
  labs(
    title = "",  
    x = "Year",  
    y = "Event Counts",  
    fill = "Event Category"  ) + 
  theme_minimal() + 
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),  
    legend.position = "bottom"  
  )

ggsave("total_counts_new2025.png", height = 8, width = 8)


### Figure A2 - time series for each of the world regions in RoW 

##making sure all regions are correct
final <- final %>%
  mutate(
    e_regionpol_6C = case_when(
      Country == "Czech Republic" ~ "Eastern Europe and Central Asia",  
      Country == "Turkey" ~ "Middle East and North Africa", 
      Country == "North Macedonia" ~ "Eastern Europe and Central Asia",
      Country == "Palestine" ~ "Middle East and North Africa",
      Country == "South Sudan" ~ "Sub-Saharan Africa",
      Country == "Brunei" ~ "Asia and Pacific",
      Country == "Belize" ~ "Latin America and the Caribbean",
      TRUE ~ e_regionpol_6C  
    )
  )

final <- final %>% 
  filter(Year_1 <= 2024)


time_series_data <- final %>%
  group_by(Year_1, e_regionpol_6C) %>%
  filter(!is.na(e_regionpol_6C)) %>%    
  summarize(total_events = n(), .groups = "drop")

ggplot(time_series_data, aes(x = Year_1, y = total_events, color = e_regionpol_6C)) +
  geom_line(linewidth = 1) +  
  labs(title = "",
       x = "Year",
       y = "Event Counts",
       color = "Region") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

ggsave("time_series_nov2025.png", height = 8, width = 8)

##################################################################
### Figure A3 and A4 - comparing student case study and FH

cleandata <- read_excel("compiled_clean_keepduplicates.xlsx")

cleandata <- cleandata %>%
  mutate(Country = recode(Country, !!!country_name_mapping))

#just triple checking it's an integer
cleandata$Year_1 <- as.integer(cleandata$Year_1)

### Merging V-Dem with the combined dataset to get liberal democracy,
### Regimes of the World, Polyarchy, and region information. 
final <- left_join(cleandata, vdem, by = c("Country", "Year_1")) %>%
  arrange(Country)

### Recoding regions from numerics into characters.
final <- final %>%
  mutate(e_regionpol_6C = recode(e_regionpol_6C,
                                 `1` = "Eastern Europe and Central Asia",
                                 `2` = "Latin America and the Caribbean",
                                 `3` = "Middle East and North Africa",
                                 `4` = "Sub-Saharan Africa",
                                 `5` = "Western Europe and North America",
                                 `6` = "Asia and Pacific"))


### Recoding Regimes of the World from numerics into characters.
final <- final %>%
  mutate(v2x_regime = recode(v2x_regime,
                             `0` = "Closed Autocracy",
                             `1` = "Electoral Autocracy",
                             `2` = "Electoral Democracy",
                             `3` = "Liberal Democracy")) 

final <- final %>%
  mutate(
    v2x_regime = case_when(
      Country %in% c("Brunei", "South Sudan") ~ "Closed Autocracy",
      Country %in% c("Belize", "Antigua and Barbuda", "Andorra", "St Kitts and Nevis") ~ "Liberal Democracy",
      TRUE ~ v2x_regime
    )
  )

final <- final %>%
  mutate(
    v2x_regime = case_when(
      Country == "Palestine" & Year_1 >= 2000 & Year_1 <= 2002 ~ "Electoral Autocracy",
      Country == "Palestine" & Year_1 >= 2003 & Year_1 <= 2006 ~ "Electoral Democracy",
      Country == "Palestine" & Year_1 > 2006 ~ "Electoral Autocracy",
      TRUE ~ v2x_regime
    )
  )

############################## Student Case data ###############################

studentcase <- final %>%
  filter(is.na(FH_year))

### ### Extract top 5 events for each in Student Case 

destabilizing_student <- studentcase %>%
  filter(Type == "Destabilizing Event") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

symptoms_student <- studentcase %>%
  filter(Type == "Symptom") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

precursors_student <- studentcase %>%
  filter(Type == "Precursor") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

resistance_student <- studentcase %>%
  filter(Type == "Resistance") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

### Combining the dataframes
combined_student <- bind_rows(
  destabilizing_student %>% mutate(type = "Destabilizing Event"),
  symptoms_student %>% mutate(type = "Symptom"),
  precursors_student %>% mutate(type = "Precursor"),
  resistance_student %>% mutate(type = "Resistance")
)

combined_student$type <- factor(combined_student$type, levels = c("Destabilizing Event", "Precursor", "Symptom", "Resistance"))

sc_plot <- ggplot(combined_student, aes(x = reorder(Category, count), y = count, fill = type)) + 
  geom_bar(stat = "identity") + 
  facet_wrap(~ type, scales = "free_y", ncol = 1) +  
  scale_fill_manual(values = c("Destabilizing Event" = "red",  
                               "Symptom" = "black", 
                               "Precursor" = "darkgray", 
                               "Resistance" = "lightgray")) + 
  labs(title = "", x = "Category", y = "Count") + 
  coord_flip() + 
  theme_minimal() + 
  theme(legend.position = "none", 
        strip.background = element_blank(), 
        strip.text = element_text(face = "bold"), 
        strip.placement = "top")

sc_plot

ggsave(
  filename = "sc_plot.png",  
  plot = sc_plot,            
  width = 8,                  
  height = 6,                  
  dpi = 300                    
)


######################### FREEDOM HOUSE DATASET ###############################

freedomhouse <- final %>%
  filter(!is.na(FH_year))

### ### Extract top 5 events for each in Freedom House

destabilizing <- freedomhouse %>%
  filter(Type == "Destabilizing Event") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

symptoms <- freedomhouse %>%
  filter(Type == "Symptom") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

precursors <- freedomhouse %>%
  filter(Type == "Precursor") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

resistance <- freedomhouse %>%
  filter(Type == "Resistance") %>%
  count(Category, name = "count") %>%
  arrange(desc(count)) %>%
  slice_max(count, n = 5)

### Combining the dataframes
combined_data <- bind_rows(
  destabilizing %>% mutate(type = "Destabilizing Event"),
  symptoms %>% mutate(type = "Symptom"),
  precursors %>% mutate(type = "Precursor"),
  resistance %>% mutate(type = "Resistance")
)

combined_data$type <- factor(combined_data$type, levels = c("Destabilizing Event", "Precursor", "Symptom", "Resistance"))

fh_plot <- ggplot(combined_data, aes(x = reorder(Category, count), y = count, fill = type)) + 
  geom_bar(stat = "identity") + 
  facet_wrap(~ type, scales = "free_y", ncol = 1) +  
  scale_fill_manual(values = c("Destabilizing Event" = "red",  
                               "Symptom" = "black", 
                               "Precursor" = "darkgray", 
                               "Resistance" = "lightgray")) + 
  labs(title = "", x = "Category", y = "Count") + 
  coord_flip() + 
  theme_minimal() + 
  theme(legend.position = "none", 
        strip.background = element_blank(), 
        strip.text = element_text(face = "bold"), 
        strip.placement = "top")

fh_plot

ggsave(
  filename = "fh_plot.png",  
  plot = fh_plot,            
  width = 8,                  
  height = 6,                  
  dpi = 300                    
)

###### Now let's assess overlap between the two dataset versions ###############


### Now comparing total events by type by proportion...

sc_plot_data <- studentcase %>%
  filter(Type %in% c("Symptom", "Precursor", "Resistance", "Destabilizing Event")) %>%
  mutate(Type = recode(Type, "Destabilizing Event" = "Destabilizing"),
         dataset = "Student Case") %>%
  count(Type, dataset) %>%
  group_by(dataset) %>%
  mutate(prop = n / sum(n)) %>%   
  ungroup()

# Prepare FH dataset
fh_plot_data <- freedomhouse %>%
  filter(Type %in% c("Symptom", "Precursor", "Resistance", "Destabilizing Event")) %>%
  mutate(Type = recode(Type, "Destabilizing Event" = "Destabilizing"),
         dataset = "Freedom House") %>%
  count(Type, dataset) %>%
  group_by(dataset) %>%
  mutate(prop = n / sum(n)) %>%   
  ungroup()

# Combine datasets
combined_plot_data <- bind_rows(sc_plot_data, fh_plot_data)

# Plot proportions
comparisonplot_scfh = ggplot(combined_plot_data, aes(x = Type, y = prop, fill = Type)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = c("Destabilizing" = "red",
                               "Precursor" = "gray45",
                               "Symptom" = "gray78",
                               "Resistance" = "gray6")) +
  labs(title = "", x = "", y = "Proportion of Events") +
  theme_minimal() +
  facet_wrap(~ dataset, scales = "fixed") +
  theme(legend.position = "right", plot.title = element_text(hjust = 0.5))

comparisonplot_scfh

ggsave(
  filename = "comparisonplot_scfh.png",  
  plot = comparisonplot_scfh,            
  width = 8,                  
  height = 6,                  
  dpi = 300                    
)


### Now patchworking top 5 event types

top5_comparison <- sc_plot | fh_plot +
  plot_annotation(
    title = "Top 5 Event Categories by Type",
    tag_levels = c('Student Case' = 'FH'),  
    theme = theme(
      plot.title = element_text(hjust = 0.5, face = "bold"),
      plot.tag = element_text(face = "bold", size = 14)
    )
  )

sc_plot_labeled <- sc_plot + 
  labs(title = "Student Case") +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))

fh_plot_labeled <- fh_plot + 
  labs(title = "Freedom House") +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))

top5_comparison <- sc_plot_labeled | fh_plot_labeled +
  plot_annotation(title = "Top 5 Event Categories by Type") &
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))

top5_comparison


ggsave(
  filename = "top5_comparison_scfh.png",
  plot = top5_comparison,
  width = 12,
  height = 8,
  dpi = 300
)


